"""
1. 创建项目
2.创建项目文件
3.
"""

import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from ..items import ReadbookItem


class ReadSpider(CrawlSpider):
    name = "read"
    allowed_domains = ["www.dushu.com"]
    start_urls = ["https://www.dushu.com/book/1617.html"]

    # follow 是否跟进 False 不跟进 True 跟进
    rules = (Rule(LinkExtractor(allow=r"/book/1617_\d+\.html"), callback="parse_item", follow=False),)

    def parse_item(self, response):
        img_list = response.xpath('//div[@class="bookslist"]//img')
        for img in img_list:
            src = img.xpath('./@data-original').extract_first()
            name = img.xpath('./@alt').extract_first()
            yield ReadbookItem(src=src, name=name)

